#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

from lingua_franca.time import now_local

from .parse_common import (is_numeric, look_for_fractions, Normalizer,
                           tokenize, Token)


def _find_numbers_in_text(tokens):
    """Finds duration related numbers in texts and makes a list of mappings.

    The mapping will be for number to token that created it, if no number was
    created from the token the mapping will be from None to the token.

    The function is optimized to generate data that can be parsed to a duration
    so it returns the list in reverse order to make the "size" (minutes/hours/
    etc.) come first and the related numbers afterwards.

    Args:
        tokens: Tokens to parse

    Returns:
        list of (number, token) tuples
    """
    parts = []
    for tok in tokens:
        res = extract_number_sv(tok.word)
        if res:
            parts.insert(0, (res, tok))
            # Special case for quarter of an hour
            if tok.word == 'kvart':
                parts.insert(0, (None, Token('timmar', index=-1)))
        elif tok.word in ['halvtimme', 'halvtimma']:
            parts.insert(0, (30, tok))
            parts.insert(0, (None, Token('minuter', index=-1)))
        else:
            parts.insert(0, (None, tok))
    return parts


def _combine_adjacent_numbers(number_map):
    """Combine adjacent numbers through multiplication.

    Walks through a number map and joins adjasent numbers to handle cases
    such as "en halvtimme" (one half hour).

    Returns:
        (list): simplified number_map
    """
    simplified = []
    skip = False
    for i in range(len(number_map) - 1):
        if skip:
            skip = False
            continue
        if number_map[i][0] and number_map[i + 1][0]:
            combined_number = number_map[i][0] * number_map[i + 1][0]
            combined_tokens = (number_map[i][1], number_map[i + 1][1])
            simplified.append((combined_number, combined_tokens))
            skip = True
        else:
            simplified.append((number_map[i][0], (number_map[i][1],)))

    if not skip:
        simplified.append((number_map[-1][0], (number_map[-1][1],)))
    return simplified


def extract_duration_sv(text):
    """
    Convert an swedish phrase into a number of seconds.

    The function handles durations from seconds up to days.

    Convert things like:
        "10 minute"
        "2 and a half hours"
        "3 days 8 hours 10 minutes and 49 seconds"
    into an int, representing the total number of seconds.

    The words used in the duration will be consumed, and
    the remainder returned.

    As an example, "set a timer for 5 minutes" would return
    (300, "set a timer for").

    Args:
        text (str): string containing a duration

    Returns:
        (timedelta, str):
                    A tuple containing the duration and the remaining text
                    not consumed in the parsing. The first value will
                    be None if no duration is found. The text returned
                    will have whitespace stripped from the ends.
    """
    tokens = tokenize(text)
    number_tok_map = _find_numbers_in_text(tokens)
    # Combine adjacent numbers
    simplified = _combine_adjacent_numbers(number_tok_map)

    states = {
        'days': 0,
        'hours': 0,
        'minutes': 0,
        'seconds': 0
    }

    # Parser state, mapping words that should set the parser to collect
    # numbers to a specific time "size"
    state_words = {
        'days': ('dygn', 'dag', 'dagar', 'dags'),
        'hours': ('timmar', 'timme', 'timma', 'timmes', 'timmas'),
        'minutes': ('minuter', 'minuters', 'minut', 'minuts'),
        'seconds': ('sekunder', 'sekunders', 'sekund', 'sekunds')
    }
    binding_words = ('och')

    consumed = []
    state = None
    valid = False

    for num, toks in simplified:
        if state and num:
            states[state] += num
            consumed.extend(toks)
            valid = True  # If a state field got set this is valid duration
        elif num is None:
            for s in state_words:
                if toks[0].word in state_words[s]:
                    state = s
                    consumed.extend(toks)
                    break
            else:
                if toks[0].word not in binding_words:
                    state = None

    td = timedelta(**states)
    remainder = ' '.join([t.word for t in tokens if t not in consumed])
    return (td, remainder) if valid else None


def extract_number_sv(text, short_scale=True, ordinals=False):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number
    """
    # TODO: short_scale and ordinals don't do anything here.
    # The parameters are present in the function signature for API
    # compatibility reasons.
    text = text.lower()
    aWords = text.split()
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            val = float(word)
        elif word == "första":
            val = 1
        elif word == "andra":
            val = 2
        elif word == "tredje":
            val = 3
        elif word == "fjärde":
            val = 4
        elif word == "femte":
            val = 5
        elif word == "sjätte":
            val = 6
        elif is_fractional_sv(word):
            val = is_fractional_sv(word)
        else:
            if word == "en":
                val = 1
            if word == "ett":
                val = 1
            elif word == "två":
                val = 2
            elif word == "tre":
                val = 3
            elif word == "fyra":
                val = 4
            elif word == "fem":
                val = 5
            elif word == "sex":
                val = 6
            elif word == "sju":
                val = 7
            elif word == "åtta":
                val = 8
            elif word == "nio":
                val = 9
            elif word == "tio":
                val = 10
            if val:
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = is_fractional_sv(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "och"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    return val or False


def extract_datetime_sv(text, anchorDate=None, default_time=None):
    def clean_string(s):
        """
            cleans the input string of unneeded punctuation and capitalization
            among other things.
        """
        s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
            .replace(' den ', ' ').replace(' en ', ' ')
        wordList = s.split()
        for idx, word in enumerate(wordList):
            word = word.replace("'s", "")

            ordinals = ["rd", "st", "nd", "th"]
            if word[0].isdigit():
                for ordinal in ordinals:
                    if ordinal in word:
                        word = word.replace(ordinal, "")
            wordList[idx] = word

        return wordList

    def date_found():
        return found or \
            (
                datestr != "" or timeStr != "" or
                yearOffset != 0 or monthOffset != 0 or
                dayOffset is True or hrOffset != 0 or
                hrAbs or minOffset != 0 or
                minAbs or secOffset != 0
            )

    if text == "":
        return None

    anchorDate = anchorDate or now_local()
    found = False
    daySpecified = False
    dayOffset = False
    monthOffset = 0
    yearOffset = 0
    dateNow = anchorDate
    today = dateNow.strftime("%w")
    currentYear = dateNow.strftime("%Y")
    fromFlag = False
    datestr = ""
    hasYear = False
    timeQualifier = ""

    timeQualifiersList = ['morgon', 'förmiddag', 'eftermiddag', 'kväll']
    markers = ['på', 'i', 'den här', 'kring', 'efter']
    days = ['måndag', 'tisdag', 'onsdag', 'torsdag',
            'fredag', 'lördag', 'söndag']
    months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
              'juli', 'augusti', 'september', 'oktober', 'november',
              'december']
    monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
                   'sept', 'oct', 'nov', 'dec']

    words = clean_string(text)

    for idx, word in enumerate(words):
        if word == "":
            continue
        wordPrevPrev = words[idx - 2] if idx > 1 else ""
        wordPrev = words[idx - 1] if idx > 0 else ""
        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""

        # this isn't in clean string because I don't want to save back to words
        word = word.rstrip('s')
        start = idx
        used = 0
        # save timequalifier for later
        if word in timeQualifiersList:
            timeQualifier = word
            # parse today, tomorrow, day after tomorrow
        elif word == "idag" and not fromFlag:
            dayOffset = 0
            used += 1
        elif word == "imorgon" and not fromFlag:
            dayOffset = 1
            used += 1
        elif word == "morgondagen" or word == "morgondagens" and not fromFlag:
            dayOffset = 1
            used += 1
        elif word == "övermorgon" and not fromFlag:
            dayOffset = 2
            used += 1
        # parse 5 days, 10 weeks, last week, next week
        elif word == "dag" or word == "dagar":
            if wordPrev[0].isdigit():
                dayOffset += int(wordPrev)
                start -= 1
                used = 2
        elif word == "vecka" or word == "veckor" and not fromFlag:
            if wordPrev[0].isdigit():
                dayOffset += int(wordPrev) * 7
                start -= 1
                used = 2
            elif wordPrev == "nästa":
                dayOffset = 7
                start -= 1
                used = 2
            elif wordPrev == "förra":
                dayOffset = -7
                start -= 1
                used = 2
                # parse 10 months, next month, last month
        elif word == "månad" and not fromFlag:
            if wordPrev[0].isdigit():
                monthOffset = int(wordPrev)
                start -= 1
                used = 2
            elif wordPrev == "nästa":
                monthOffset = 1
                start -= 1
                used = 2
            elif wordPrev == "förra":
                monthOffset = -1
                start -= 1
                used = 2
                # parse 5 years, next year, last year
        elif word == "år" and not fromFlag:
            if wordPrev[0].isdigit():
                yearOffset = int(wordPrev)
                start -= 1
                used = 2
            elif wordPrev == "nästa":
                yearOffset = 1
                start -= 1
                used = 2
            elif wordPrev == "förra":
                yearOffset = -1
                start -= 1
                used = 2
                # parse Monday, Tuesday, etc., and next Monday,
                # last Tuesday, etc.
        elif word in days and not fromFlag:
            d = days.index(word)
            dayOffset = (d + 1) - int(today)
            used = 1
            if dayOffset < 0:
                dayOffset += 7
            if wordPrev == "nästa":
                dayOffset += 7
                used += 1
                start -= 1
            elif wordPrev == "förra":
                dayOffset -= 7
                used += 1
                start -= 1
        # parse 15 of July, June 20th, Feb 18, 19 of February
        elif word in months or word in monthsShort and not fromFlag:
            try:
                m = months.index(word)
            except ValueError:
                m = monthsShort.index(word)
            used += 1
            datestr = months[m]
            if wordPrev and (wordPrev[0].isdigit() or
                             (wordPrev == "of" and wordPrevPrev[0].isdigit())):
                if wordPrev == "of" and wordPrevPrev[0].isdigit():
                    datestr += " " + words[idx - 2]
                    used += 1
                    start -= 1
                else:
                    datestr += " " + wordPrev
                start -= 1
                used += 1
                if wordNext and wordNext[0].isdigit():
                    datestr += " " + wordNext
                    used += 1
                    hasYear = True
                else:
                    hasYear = False

            elif wordNext and wordNext[0].isdigit():
                datestr += " " + wordNext
                used += 1
                if wordNextNext and wordNextNext[0].isdigit():
                    datestr += " " + wordNextNext
                    used += 1
                    hasYear = True
                else:
                    hasYear = False
        # parse 5 days from tomorrow, 10 weeks from next thursday,
        # 2 months from July
        validFollowups = days + months + monthsShort
        validFollowups.append("idag")
        validFollowups.append("imorgon")
        validFollowups.append("nästa")
        validFollowups.append("förra")
        validFollowups.append("nu")
        if (word == "från" or word == "efter") and wordNext in validFollowups:
            used = 2
            fromFlag = True
            if wordNext == "imorgon":
                dayOffset += 1
            elif wordNext in days:
                d = days.index(wordNext)
                tmpOffset = (d + 1) - int(today)
                used = 2
                if tmpOffset < 0:
                    tmpOffset += 7
                dayOffset += tmpOffset
            elif wordNextNext and wordNextNext in days:
                d = days.index(wordNextNext)
                tmpOffset = (d + 1) - int(today)
                used = 3
                if wordNext == "nästa":
                    tmpOffset += 7
                    used += 1
                    start -= 1
                elif wordNext == "förra":
                    tmpOffset -= 7
                    used += 1
                    start -= 1
                dayOffset += tmpOffset
        if used > 0:
            if start - 1 > 0 and words[start - 1] == "denna":
                start -= 1
                used += 1

            for i in range(0, used):
                words[i + start] = ""

            if start - 1 >= 0 and words[start - 1] in markers:
                words[start - 1] = ""
            found = True
            daySpecified = True

    # parse time
    timeStr = ""
    hrOffset = 0
    minOffset = 0
    secOffset = 0
    hrAbs = None
    minAbs = None

    for idx, word in enumerate(words):
        if word == "":
            continue

        wordPrevPrev = words[idx - 2] if idx > 1 else ""
        wordPrev = words[idx - 1] if idx > 0 else ""
        wordNext = words[idx + 1] if idx + 1 < len(words) else ""
        wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
        # parse noon, midnight, morning, afternoon, evening
        used = 0
        if word == "middag":
            hrAbs = 12
            used += 1
        elif word == "midnatt":
            hrAbs = 0
            used += 1
        elif word == "morgon":
            if not hrAbs:
                hrAbs = 8
            used += 1
        elif word == "förmiddag":
            if not hrAbs:
                hrAbs = 10
            used += 1
        elif word == "eftermiddag":
            if not hrAbs:
                hrAbs = 15
            used += 1
        elif word == "kväll":
            if not hrAbs:
                hrAbs = 19
            used += 1
            # parse half an hour, quarter hour
        elif wordPrev in markers or wordPrevPrev in markers:
            if word == "halvtimme" or word == "halvtimma":
                minOffset = 30
            elif word == "kvart":
                minOffset = 15
            elif word == "timme" or word == "timma":
                hrOffset = 1
            words[idx - 1] = ""
            used += 1
            hrAbs = -1
            minAbs = -1
            # parse 5:00 am, 12:00 p.m., etc
        elif word[0].isdigit():
            isTime = True
            strHH = ""
            strMM = ""
            remainder = ""
            if ':' in word:
                # parse colons
                # "3:00 in the morning"
                stage = 0
                length = len(word)
                for i in range(length):
                    if stage == 0:
                        if word[i].isdigit():
                            strHH += word[i]
                        elif word[i] == ":":
                            stage = 1
                        else:
                            stage = 2
                            i -= 1
                    elif stage == 1:
                        if word[i].isdigit():
                            strMM += word[i]
                        else:
                            stage = 2
                            i -= 1
                    elif stage == 2:
                        remainder = word[i:].replace(".", "")
                        break
                if remainder == "":
                    nextWord = wordNext.replace(".", "")
                    if nextWord == "am" or nextWord == "pm":
                        remainder = nextWord
                        used += 1
                    elif nextWord == "tonight":
                        remainder = "pm"
                        used += 1
                    elif wordNext == "in" and wordNextNext == "the" and \
                            words[idx + 3] == "morning":
                        remainder = "am"
                        used += 3
                    elif wordNext == "in" and wordNextNext == "the" and \
                            words[idx + 3] == "afternoon":
                        remainder = "pm"
                        used += 3
                    elif wordNext == "in" and wordNextNext == "the" and \
                            words[idx + 3] == "evening":
                        remainder = "pm"
                        used += 3
                    elif wordNext == "in" and wordNextNext == "morning":
                        remainder = "am"
                        used += 2
                    elif wordNext == "in" and wordNextNext == "afternoon":
                        remainder = "pm"
                        used += 2
                    elif wordNext == "in" and wordNextNext == "evening":
                        remainder = "pm"
                        used += 2
                    elif wordNext == "this" and wordNextNext == "morning":
                        remainder = "am"
                        used = 2
                    elif wordNext == "this" and wordNextNext == "afternoon":
                        remainder = "pm"
                        used = 2
                    elif wordNext == "this" and wordNextNext == "evening":
                        remainder = "pm"
                        used = 2
                    elif wordNext == "at" and wordNextNext == "night":
                        if strHH > 5:
                            remainder = "pm"
                        else:
                            remainder = "am"
                        used += 2
                    else:
                        if timeQualifier != "":
                            if strHH <= 12 and \
                                    (timeQualifier == "evening" or
                                     timeQualifier == "afternoon"):
                                strHH += 12
            else:
                # try to parse # s without colons
                # 5 hours, 10 minutes etc.
                length = len(word)
                strNum = ""
                remainder = ""
                for i in range(length):
                    if word[i].isdigit():
                        strNum += word[i]
                    else:
                        remainder += word[i]

                if remainder == "":
                    remainder = wordNext.replace(".", "").lstrip().rstrip()

                if (
                        remainder == "pm" or
                        wordNext == "pm" or
                        remainder == "p.m." or
                        wordNext == "p.m."):
                    strHH = strNum
                    remainder = "pm"
                    used = 1
                elif (
                        remainder == "am" or
                        wordNext == "am" or
                        remainder == "a.m." or
                        wordNext == "a.m."):
                    strHH = strNum
                    remainder = "am"
                    used = 1
                else:
                    if wordNext == "pm" or wordNext == "p.m.":
                        strHH = strNum
                        remainder = "pm"
                        used = 1
                    elif wordNext == "am" or wordNext == "a.m.":
                        strHH = strNum
                        remainder = "am"
                        used = 1
                    elif (
                            int(word) > 100 and
                            (
                                wordPrev == "o" or
                                wordPrev == "oh"
                            )):
                        # 0800 hours (pronounced oh-eight-hundred)
                        strHH = int(word) / 100
                        strMM = int(word) - strHH * 100
                        if wordNext == "hours":
                            used += 1
                    elif (
                            wordNext == "hours" and
                            word[0] != '0' and
                            (
                                int(word) < 100 and
                                int(word) > 2400
                            )):
                        # "in 3 hours"
                        hrOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1

                    elif wordNext == "minutes":
                        # "in 10 minutes"
                        minOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1
                    elif wordNext == "seconds":
                        # in 5 seconds
                        secOffset = int(word)
                        used = 2
                        isTime = False
                        hrAbs = -1
                        minAbs = -1
                    elif int(word) > 100:
                        strHH = int(word) / 100
                        strMM = int(word) - strHH * 100
                        if wordNext == "hours":
                            used += 1
                    elif wordNext[0].isdigit():
                        strHH = word
                        strMM = wordNext
                        used += 1
                        if wordNextNext == "hours":
                            used += 1
                    elif (
                            wordNext == "" or wordNext == "o'clock" or
                            (
                                        wordNext == "in" and
                                        (
                                            wordNextNext == "the" or
                                            wordNextNext == timeQualifier
                                        )
                            )):
                        strHH = word
                        strMM = 00
                        if wordNext == "o'clock":
                            used += 1
                        if wordNext == "in" or wordNextNext == "in":
                            used += (1 if wordNext == "in" else 2)
                            if (wordNextNext and
                                wordNextNext in timeQualifier or
                                (words[words.index(wordNextNext) + 1] and
                                 words[words.index(wordNextNext) + 1] in
                                 timeQualifier)):
                                if (wordNextNext == "afternoon" or
                                    (len(words) >
                                     words.index(wordNextNext) + 1 and
                                     words[words.index(
                                         wordNextNext) + 1] == "afternoon")):
                                    remainder = "pm"
                                if (wordNextNext == "evening" or
                                    (len(words) >
                                     (words.index(wordNextNext) + 1) and
                                     words[words.index(
                                         wordNextNext) + 1] == "evening")):
                                    remainder = "pm"
                                if (wordNextNext == "morning" or
                                    (len(words) >
                                     words.index(wordNextNext) + 1 and
                                     words[words.index(
                                         wordNextNext) + 1] == "morning")):
                                    remainder = "am"
                    else:
                        isTime = False

            strHH = int(strHH) if strHH else 0
            strMM = int(strMM) if strMM else 0
            strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
            strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
            if strHH > 24 or strMM > 59:
                isTime = False
                used = 0
            if isTime:
                hrAbs = strHH * 1
                minAbs = strMM * 1
                used += 1
        if used > 0:
            # removed parsed words from the sentence
            for i in range(used):
                words[idx + i] = ""

            if wordPrev == "o" or wordPrev == "oh":
                words[words.index(wordPrev)] = ""

            if wordPrev == "early":
                hrOffset = -1
                words[idx - 1] = ""
                idx -= 1
            elif wordPrev == "late":
                hrOffset = 1
                words[idx - 1] = ""
                idx -= 1
            if idx > 0 and wordPrev in markers:
                words[idx - 1] = ""
            if idx > 1 and wordPrevPrev in markers:
                words[idx - 2] = ""

            idx += used - 1
            found = True

    # check that we found a date
    if not date_found():
        return None

    if dayOffset is False:
        dayOffset = 0

    # perform date manipulation

    extractedDate = dateNow
    extractedDate = extractedDate.replace(microsecond=0,
                                          second=0,
                                          minute=0,
                                          hour=0)
    if datestr != "":
        temp = datetime.strptime(datestr, "%B %d")
        if not hasYear:
            temp = temp.replace(year=extractedDate.year)
            if extractedDate < temp:
                extractedDate = extractedDate.replace(year=int(currentYear),
                                                      month=int(
                                                          temp.strftime(
                                                              "%m")),
                                                      day=int(temp.strftime(
                                                          "%d")))
            else:
                extractedDate = extractedDate.replace(
                    year=int(currentYear) + 1,
                    month=int(temp.strftime("%m")),
                    day=int(temp.strftime("%d")))
        else:
            extractedDate = extractedDate.replace(
                year=int(temp.strftime("%Y")),
                month=int(temp.strftime("%m")),
                day=int(temp.strftime("%d")))

    if timeStr != "":
        temp = datetime(timeStr)
        extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
                                              minute=temp.strftime("%M"),
                                              second=temp.strftime("%S"))

    if yearOffset != 0:
        extractedDate = extractedDate + relativedelta(years=yearOffset)
    if monthOffset != 0:
        extractedDate = extractedDate + relativedelta(months=monthOffset)
    if dayOffset != 0:
        extractedDate = extractedDate + relativedelta(days=dayOffset)

    if hrAbs is None and minAbs is None and default_time:
        hrAbs = default_time.hour
        minAbs = default_time.minute
    if hrAbs != -1 and minAbs != -1:
        extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
                                                      minutes=minAbs or 0)
        if (hrAbs or minAbs) and datestr == "":
            if not daySpecified and dateNow > extractedDate:
                extractedDate = extractedDate + relativedelta(days=1)
    if hrOffset != 0:
        extractedDate = extractedDate + relativedelta(hours=hrOffset)
    if minOffset != 0:
        extractedDate = extractedDate + relativedelta(minutes=minOffset)
    if secOffset != 0:
        extractedDate = extractedDate + relativedelta(seconds=secOffset)
    for idx, word in enumerate(words):
        if words[idx] == "and" and words[idx - 1] == "" and words[
                idx + 1] == "":
            words[idx] = ""

    resultStr = " ".join(words)
    resultStr = ' '.join(resultStr.split())
    return [extractedDate, resultStr]


def is_fractional_sv(input_str, short_scale=True):
    """
    This function takes the given text and checks if it is a fraction.

    Args:
        input_str (str): the string to check if fractional
        short_scale (bool): use short scale if True, long scale if False
    Returns:
        (bool) or (float): False if not a fraction, otherwise the fraction

    """
    if input_str.endswith('ars', -3):
        input_str = input_str[:len(input_str) - 3]  # e.g. "femtedelar"
    if input_str.endswith('ar', -2):
        input_str = input_str[:len(input_str) - 2]  # e.g. "femtedelar"
    if input_str.endswith('a', -1):
        input_str = input_str[:len(input_str) - 1]  # e.g. "halva"
    if input_str.endswith('s', -1):
        input_str = input_str[:len(input_str) - 1]  # e.g. "halva"

    aFrac = ["hel", "halv", "tredjedel", "fjärdedel", "femtedel", "sjättedel",
             "sjundedel", "åttondel", "niondel", "tiondel", "elftedel",
             "tolftedel"]
    if input_str.lower() in aFrac:
        return 1.0 / (aFrac.index(input_str) + 1)
    if input_str == "kvart":
        return 1.0 / 4
    if input_str == "trekvart":
        return 3.0 / 4

    return False


def normalize_sv(text, remove_articles=True):
    """ English string normalization """

    words = text.split()  # this also removed extra spaces
    normalized = ''
    for word in words:
        # Convert numbers into digits, e.g. "two" -> "2"
        if word == 'en':
            word = 'ett'
        textNumbers = ["noll", "ett", "två", "tre", "fyra", "fem", "sex",
                       "sju", "åtta", "nio", "tio", "elva", "tolv",
                       "tretton", "fjorton", "femton", "sexton",
                       "sjutton", "arton", "nitton", "tjugo"]
        if word in textNumbers:
            word = str(textNumbers.index(word))

        normalized += " " + word

    return normalized[1:]  # strip the initial space


class SwedishNormalizer(Normalizer):
    """ TODO implement language specific normalizer"""
